import pandas as pd

MOVIE_RATING_PATH = "../../data/rating_combine_20190506.csv"

combine_data = pd.read_csv(MOVIE_RATING_PATH, sep="::",engine='python')
pd.set_option('display.max_rows', 1000)

# 随便找个人看看打分的分布
print(combine_data[["userId", "movieId", "rating"]][combine_data["userId"]==9].groupby(by='rating').count())
print("------------------------------------------------------")

# 随便找个电影看看打分的分布
print(combine_data[["userId", "movieId", "rating"]][combine_data["movieId"]==20].groupby(by='rating').count())
print("------------------------------------------------------")

# 统计每个用户评论电影的数量
print(combine_data[["userId", "movieId", "rating"]].groupby(by='userId').count())
print("------------------------------------------------------")

# 统计每个电影被评论的数量
print(combine_data[["userId", "movieId", "rating"]].groupby(by='movieId').count())
print("------------------------------------------------------")

# 统计给电影打分次数的分布
print(combine_data[["userId", "movieId", "rating"]].groupby(by='userId').count().groupby(by='rating').count())
print("------------------------------------------------------")